import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
df=pd.read_csv('1976-2020-president.csv')
df
| year | state | state_po | state_fips | state_cen | state_ic | office | candidate | party_detailed | writein | candidatevotes | totalvotes | version | notes | party_simplified | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 659170 | 1182850 | 20210113 | NaN | DEMOCRAT |
| 1 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | FORD, GERALD | REPUBLICAN | False | 504070 | 1182850 | 20210113 | NaN | REPUBLICAN |
| 2 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | MADDOX, LESTER | AMERICAN INDEPENDENT PARTY | False | 9198 | 1182850 | 20210113 | NaN | OTHER |
| 3 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | BUBAR, BENJAMIN ""BEN"" | PROHIBITION | False | 6669 | 1182850 | 20210113 | NaN | OTHER |
| 4 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | HALL, GUS | COMMUNIST PARTY USE | False | 1954 | 1182850 | 20210113 | NaN | OTHER |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4282 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | JORGENSEN, JO | LIBERTARIAN | False | 5768 | 278503 | 20210113 | NaN | LIBERTARIAN |
| 4283 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | PIERCE, BROCK | INDEPENDENT | False | 2208 | 278503 | 20210113 | NaN | OTHER |
| 4284 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | NaN | NaN | True | 1739 | 278503 | 20210113 | NaN | OTHER |
| 4285 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | OVERVOTES | NaN | False | 279 | 278503 | 20210113 | NaN | OTHER |
| 4286 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | UNDERVOTES | NaN | False | 1459 | 278503 | 20210113 | NaN | OTHER |
4287 rows × 15 columns
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4287 entries, 0 to 4286 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 year 4287 non-null int64 1 state 4287 non-null object 2 state_po 4287 non-null object 3 state_fips 4287 non-null int64 4 state_cen 4287 non-null int64 5 state_ic 4287 non-null int64 6 office 4287 non-null object 7 candidate 4000 non-null object 8 party_detailed 3831 non-null object 9 writein 4284 non-null object 10 candidatevotes 4287 non-null int64 11 totalvotes 4287 non-null int64 12 version 4287 non-null int64 13 notes 0 non-null float64 14 party_simplified 4287 non-null object dtypes: float64(1), int64(7), object(7) memory usage: 502.5+ KB
df.describe()
| year | state_fips | state_cen | state_ic | candidatevotes | totalvotes | version | notes | |
|---|---|---|---|---|---|---|---|---|
| count | 4287.000000 | 4287.000000 | 4287.000000 | 4287.000000 | 4.287000e+03 | 4.287000e+03 | 4287.0 | 0.0 |
| mean | 1999.080942 | 28.616982 | 53.665034 | 39.754840 | 3.119076e+05 | 2.366924e+06 | 20210113.0 | NaN |
| std | 14.220014 | 15.616459 | 26.029189 | 22.772216 | 7.648011e+05 | 2.465008e+06 | 0.0 | NaN |
| min | 1976.000000 | 1.000000 | 11.000000 | 1.000000 | 0.000000e+00 | 1.235740e+05 | 20210113.0 | NaN |
| 25% | 1988.000000 | 16.000000 | 33.000000 | 22.000000 | 1.177000e+03 | 6.522740e+05 | 20210113.0 | NaN |
| 50% | 2000.000000 | 28.000000 | 53.000000 | 42.000000 | 7.499000e+03 | 1.569180e+06 | 20210113.0 | NaN |
| 75% | 2012.000000 | 41.000000 | 81.000000 | 61.000000 | 1.992415e+05 | 3.033118e+06 | 20210113.0 | NaN |
| max | 2020.000000 | 56.000000 | 95.000000 | 82.000000 | 1.111025e+07 | 1.750088e+07 | 20210113.0 | NaN |
df.columns
Index(['year', 'state', 'state_po', 'state_fips', 'state_cen', 'state_ic',
'office', 'candidate', 'party_detailed', 'writein', 'candidatevotes',
'totalvotes', 'version', 'notes', 'party_simplified'],
dtype='object')
#Fill all the null values with NO_Data
df.fillna('NO_DATA',inplace=True)
df
| year | state | state_po | state_fips | state_cen | state_ic | office | candidate | party_detailed | writein | candidatevotes | totalvotes | version | notes | party_simplified | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 659170 | 1182850 | 20210113 | NO_DATA | DEMOCRAT |
| 1 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | FORD, GERALD | REPUBLICAN | False | 504070 | 1182850 | 20210113 | NO_DATA | REPUBLICAN |
| 2 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | MADDOX, LESTER | AMERICAN INDEPENDENT PARTY | False | 9198 | 1182850 | 20210113 | NO_DATA | OTHER |
| 3 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | BUBAR, BENJAMIN ""BEN"" | PROHIBITION | False | 6669 | 1182850 | 20210113 | NO_DATA | OTHER |
| 4 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | HALL, GUS | COMMUNIST PARTY USE | False | 1954 | 1182850 | 20210113 | NO_DATA | OTHER |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4282 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | JORGENSEN, JO | LIBERTARIAN | False | 5768 | 278503 | 20210113 | NO_DATA | LIBERTARIAN |
| 4283 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | PIERCE, BROCK | INDEPENDENT | False | 2208 | 278503 | 20210113 | NO_DATA | OTHER |
| 4284 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | NO_DATA | NO_DATA | True | 1739 | 278503 | 20210113 | NO_DATA | OTHER |
| 4285 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | OVERVOTES | NO_DATA | False | 279 | 278503 | 20210113 | NO_DATA | OTHER |
| 4286 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | UNDERVOTES | NO_DATA | False | 1459 | 278503 | 20210113 | NO_DATA | OTHER |
4287 rows × 15 columns
state=df['state'].unique()
p=px.histogram(df,x='state',y='totalvotes',title='State Wise Total Votes')
p.show()
plt.hist(df['totalvotes'])
plt.title('total_votes in election')
plt.show()
plt.hist(df['candidatevotes'])
plt.title('candidate_votes in election')
plt.show()
data=df.groupby(['year','party_simplified'])['party_simplified'].count().unstack()
plt.figure(figsize=(10,6))
data.plot(kind='bar',stacked=True)
<AxesSubplot:xlabel='year'>
<Figure size 720x432 with 0 Axes>
#Voters Turn out
voters_number=df.groupby('year')['totalvotes'].sum()
x=voters_number.index
y=voters_number.values
plt.figure(figsize=(10,6))
plt.plot(x,y)
plt.xlabel('Year')
plt.ylabel('Total number of votes casted in Year')
plt.title('Voter Turn Out ')
plt.show()
parties=list(df['party_simplified'].unique())
parties
['DEMOCRAT', 'REPUBLICAN', 'OTHER', 'LIBERTARIAN']
df_year_party=pd.DataFrame(columns=["year"] + parties)
df_year_party
| year | DEMOCRAT | REPUBLICAN | OTHER | LIBERTARIAN |
|---|
for year in df.year.unique():
row = {"year": year}
for party in parties:
row[party] = df[(df.year == year) & (df.party_simplified == party)].candidatevotes.sum()
df_year_party = df_year_party.append(row, ignore_index=True)
/var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead. /var/folders/j9/nxw6tr6x2630lrfnp_vmnv0m0000gn/T/ipykernel_30278/2695169755.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.
df_year_party["votes_casted"] = df_year_party[parties].sum(axis=1)
df_year_party = df_year_party.astype(np.uint32)
df_year_party.year = df_year_party.year.astype(np.uint16)
df_year_party.set_index('year', inplace=True)
df_year_party
| DEMOCRAT | REPUBLICAN | OTHER | LIBERTARIAN | votes_casted | |
|---|---|---|---|---|---|
| year | |||||
| 1976 | 40680446 | 38870893 | 1954379 | 95626 | 81601344 |
| 1980 | 35480948 | 43642639 | 6505863 | 867401 | 86496851 |
| 1984 | 37449813 | 54166829 | 811015 | 227204 | 92654861 |
| 1988 | 41716679 | 48642640 | 817798 | 409708 | 91586825 |
| 1992 | 44856747 | 38798913 | 20663272 | 280848 | 104599780 |
| 1996 | 47295351 | 39003697 | 9625419 | 465351 | 96389818 |
| 2000 | 50830580 | 50311372 | 4071625 | 380405 | 105593982 |
| 2004 | 58894561 | 61872711 | 1212870 | 369308 | 122349450 |
| 2008 | 69338846 | 59613835 | 1956116 | 510456 | 131419253 |
| 2012 | 65752017 | 60670117 | 1501463 | 1216400 | 129139997 |
| 2016 | 65677288 | 62692670 | 4292059 | 4125170 | 136787187 |
| 2020 | 81268908 | 74216146 | 1246094 | 1797355 | 158528503 |
#Total number of votes
df['totalvotes'].sum()
10147003830
#Function to extract the data of specific party
def extract_party_data(party_name):
t=df['party_simplified']==party_name
df_new=df[t]
return df_new
DEMOCRATIC PARTY ANALYSIS
dm=extract_party_data('DEMOCRAT')
dm
| year | state | state_po | state_fips | state_cen | state_ic | office | candidate | party_detailed | writein | candidatevotes | totalvotes | version | notes | party_simplified | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1976 | ALABAMA | AL | 1 | 63 | 41 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 659170 | 1182850 | 20210113 | NO_DATA | DEMOCRAT |
| 8 | 1976 | ALASKA | AK | 2 | 94 | 81 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 44058 | 123574 | 20210113 | NO_DATA | DEMOCRAT |
| 12 | 1976 | ARIZONA | AZ | 4 | 86 | 61 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 295602 | 742719 | 20210113 | NO_DATA | DEMOCRAT |
| 19 | 1976 | ARKANSAS | AR | 5 | 71 | 42 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 498604 | 767535 | 20210113 | NO_DATA | DEMOCRAT |
| 24 | 1976 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 3742284 | 7803770 | 20210113 | NO_DATA | DEMOCRAT |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 4252 | 2020 | VIRGINIA | VA | 51 | 54 | 40 | US PRESIDENT | BIDEN, JOSEPH R. JR | DEMOCRAT | False | 2413568 | 4460524 | 20210113 | NO_DATA | DEMOCRAT |
| 4256 | 2020 | WASHINGTON | WA | 53 | 91 | 73 | US PRESIDENT | BIDEN, JOSEPH R. JR | DEMOCRAT | False | 2369612 | 4087631 | 20210113 | NO_DATA | DEMOCRAT |
| 4263 | 2020 | WEST VIRGINIA | WV | 54 | 55 | 56 | US PRESIDENT | BIDEN, JOSEPH R. JR | DEMOCRAT | False | 235984 | 794652 | 20210113 | NO_DATA | DEMOCRAT |
| 4267 | 2020 | WISCONSIN | WI | 55 | 35 | 25 | US PRESIDENT | BIDEN, JOSEPH R. JR | DEMOCRAT | False | 1630866 | 3298041 | 20210113 | NO_DATA | DEMOCRAT |
| 4280 | 2020 | WYOMING | WY | 56 | 83 | 68 | US PRESIDENT | BIDEN, JOSEPH R. JR | DEMOCRAT | False | 73491 | 278503 | 20210113 | NO_DATA | DEMOCRAT |
615 rows × 15 columns
#Total Votes of democratic party
democratic_votes=dm['totalvotes'].sum()
democratic_votes
1344886700
#Candidates who took part in presidential election in democratic party
p=dm['candidate'].unique()
p
array(['CARTER, JIMMY', 'MONDALE, WALTER', 'DUKAKIS, MICHAEL',
'CLINTON, BILL', 'GORE, AL', 'KERRY, JOHN', 'OTHER',
'OBAMA, BARACK H.', 'CLINTON, HILLARY', 'NO_DATA',
'BIDEN, JOSEPH R. JR'], dtype=object)
#Calculate the total number of votes obtained for each candidate and get the top 5
x=dm.groupby('candidate')
plt.figure(figsize=(10,6))
plt.xlabel('Candidate from party')
plt.ylabel('Total number of votes obtained')
x['totalvotes'].sum().sort_values(ascending=False).head(5).plot(kind='bar',title='Total number of votes obtained for each candidate ')
<AxesSubplot:title={'center':'Total number of votes obtained for each candidate '}, xlabel='candidate', ylabel='Total number of votes obtained'>
#Calculate the total number of votes obtained as per state and get the top 5
x1=dm.groupby('state')
plt.figure(figsize=(10,6))
plt.xlabel('State')
plt.ylabel('Total number of votes obtained')
x1['totalvotes'].sum().sort_values(ascending=False).head(5).plot(kind='bar',title='Total number of votes obtained from a state ')
<AxesSubplot:title={'center':'Total number of votes obtained from a state '}, xlabel='state', ylabel='Total number of votes obtained'>
x2=dm.groupby('year')
plt.figure(figsize=(10,6))
plt.xlabel('Year')
plt.ylabel('Total number of votes obtained')
x2['totalvotes'].sum().sort_values(ascending=False).plot(kind='bar',title='Total number of votes obtained from a state ')
<AxesSubplot:title={'center':'Total number of votes obtained from a state '}, xlabel='year', ylabel='Total number of votes obtained'>
#Extract data as per state
def extract_party_data(state_name):
t1=df['state']==state_name
df_new1=df[t1]
return df_new1
m=extract_party_data('CALIFORNIA')
m
| year | state | state_po | state_fips | state_cen | state_ic | office | candidate | party_detailed | writein | candidatevotes | totalvotes | version | notes | party_simplified | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 23 | 1976 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | FORD, GERALD | REPUBLICAN | False | 3882244 | 7803770 | 20210113 | NO_DATA | REPUBLICAN |
| 24 | 1976 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | CARTER, JIMMY | DEMOCRAT | False | 3742284 | 7803770 | 20210113 | NO_DATA | DEMOCRAT |
| 25 | 1976 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | MACBRIDE, ROGER | INDEPENDENT | False | 56388 | 7803770 | 20210113 | NO_DATA | OTHER |
| 26 | 1976 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | MADDOX, LESTER | AMERICAN INDEPENDENT PARTY | False | 51098 | 7803770 | 20210113 | NO_DATA | OTHER |
| 27 | 1976 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | WRIGHT, MARGARET | PEACE & FREEDOM | False | 41731 | 7803770 | 20210113 | NO_DATA | OTHER |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3779 | 2020 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | CARROLL, BRIAN | AMERICAN SOLIDARITY PARTY | True | 2605 | 17500881 | 20210113 | NO_DATA | OTHER |
| 3780 | 2020 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | JANOS, JAMES G. "JESSE VENTURA" | INDEPENDENT | True | 611 | 17500881 | 20210113 | NO_DATA | OTHER |
| 3781 | 2020 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | MARK CHARLES | INDEPENDENT | True | 559 | 17500881 | 20210113 | NO_DATA | OTHER |
| 3782 | 2020 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | PIERCE, BROCK | INDEPENDENT | True | 185 | 17500881 | 20210113 | NO_DATA | OTHER |
| 3783 | 2020 | CALIFORNIA | CA | 6 | 93 | 71 | US PRESIDENT | JOSEPH KISHORE | INDEPENDENT | True | 121 | 17500881 | 20210113 | NO_DATA | OTHER |
85 rows × 15 columns
#Which party won more number of times in California
m1=m.groupby('party_simplified')
plt.figure(figsize=(10,6))
plt.xlabel('Party')
plt.ylabel('Total number of votes obtained')
m1['totalvotes'].sum().sort_values(ascending=False).head(5).plot(kind='bar',title='Total number of votes obtained for each candidate ')
<AxesSubplot:title={'center':'Total number of votes obtained for each candidate '}, xlabel='party_simplified', ylabel='Total number of votes obtained'>
df['state'].value_counts().head(10).plot(kind='bar')
<AxesSubplot:>
state = df.groupby('state')['totalvotes'].sum().reset_index()
States = state.sort_values('totalvotes',ascending=False).head(10)
fig = px.pie(States, values=States['totalvotes'], names=States['state'])
fig.update_layout(title = 'Top 10 States with Most Votes')
fig.show()